/* 
    Purpose: Using the 1990 Census, this file takes cleaned variables 
             from 6a and calculates average predicted father income 
             (i.e., "income scores") at various levels.

    Note: Income scores for non-working fathers are only made at the 
          preferred level of variation (occ x race x south).

    Creates: Income scores at occ, occ x race, occ x race x south,
             occ x race x region (4), and occ x race x south x edu
             levels. All output files have the prefix 
             "incomescores_fathers1990_"
*/
clear
set more off
cd "$Mydirectory1/1_DataSources/CensusData/"
 
    use ./output/Census1990_fathers_ages30to50.dta, clear   
    gen tag=1
    gen number=1 
    
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

*******************
*** TEMPLATES
*******************

* Template 1: occupation
preserve 
    collapse (rawsum) tag, by(fatheroccej)
    
    drop tag
    tempfile template_byocc
    save `template_byocc'
        
restore 

* Template 2: occupation x race
preserve 
    collapse (rawsum) tag, by(fatheroccej)
    
    expand 2, gen(race)
    replace race=race+1

    drop tag    
    tempfile template_byocc_byr
    save `template_byocc_byr'
        
restore 

* Template 3: occupation x race x south
preserve 
    collapse (rawsum) tag, by(fatheroccej)
    
    expand 2, gen(race)
    replace race=race+1
    
    expand 2, gen(south_merge)
 
    drop tag   
    tempfile template_byocc_byr_bys
    save `template_byocc_byr_bys'
        
restore 

* Template 4: occupation x race x region (4)
preserve 
    drop if fatheroccej==99 
    collapse (rawsum) tag, by(fatheroccej)
    
    expand 2, gen(race)
    replace race=race+1
    
    expand 4
    bysort fatheroccej race: gen region_merge = _n

    drop tag   
    tempfile template_byocc_byr_byreg
    save `template_byocc_byr_byreg'
        
restore 

* Template 5: occupation x race x south x edu
preserve 
    drop if fatheroccej==99 
    collapse (rawsum) tag, by(fatheroccej)
    
    expand 2, gen(race)
    replace race=race+1
    
    expand 2, gen(south_merge)
    
    expand 5
    bysort fatheroccej race south_merge: gen edu = _n

    drop tag    
    tempfile template_byors_byedu
    save `template_byors_byedu'
        
restore 

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

    global income_measures "inctot faminc HHinc"

*******************
*** COLLAPSE 
*******************

foreach x in byocc byocc_byr byocc_byr_bys byors_altwgt byocc_byr_byreg byors_byedu {

* Cell
    if "`x'"=="byocc" local cell "fatheroccej"
    if "`x'"=="byocc_byr" local cell "fatheroccej race"
    if ("`x'"=="byocc_byr_bys"| "`x'"=="byors_altwgt") local cell "fatheroccej race south_merge"
    if "`x'"=="byocc_byr_byreg" local cell "fatheroccej race region_merge"
    if "`x'"=="byors_byedu" local cell "fatheroccej race south_merge edu"

 * Weight
    if "`x'"!="byors_altwgt" local weight "perwt"
    if "`x'"=="byors_altwgt" local weight "wgt1990_hhsizeadj"

preserve 

   if ("`x'"!="byocc_byr_bys" & "`x'"!="byors_altwgt" & "`x'"!="byocc" & "`x'"!="byocc_byr") drop if fatheroccej==99 
    summ fatheroccej 
    
    collapse (rawsum) number (mean) $income_measures [aw=`weight'], by(`cell') 
    
    foreach c in $income_measures {
    rename `c' avg_`c'_1990_`x'
    label var avg_`c'_1990_`x' "Coarse income score, average, 1990 using `c'"
    }
    
    tempfile incomescores
    save `incomescores'
    
    if "`x'"!="byors_altwgt" use `template_`x''
    if "`x'"=="byors_altwgt" use `template_byocc_byr_bys'  
    merge 1:1 `cell' using `incomescores', nogen
    
    replace number=0 if number==.
    label var number "Number of obs in cell"
    rename number number_1990obs_`x'

    tempfile incomescores_`x'
    save `incomescores_`x''
    
restore 
        
}

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

****************
*** IMPUTATIONS
****************

* Merge all templates together. Bring in template with largest # of cells first.  
    use `incomescores_byors_byedu', clear
    merge m:1 fatheroccej race south_merge using `incomescores_byocc_byr_bys'
    assert fatheroccej==99 if _merge!=3 
    drop _merge

    merge m:1 fatheroccej race south_merge using `incomescores_byors_altwgt'
    assert _merge==3
    drop _merge

    merge m:1 fatheroccej race using `incomescores_byocc_byr'
    assert _merge==3
    drop _merge

    merge m:1 fatheroccej using `incomescores_byocc'
    assert _merge==3
    drop _merge

*Flag number of imputations at each level
    foreach level in byocc byocc_byr byocc_byr_bys byors_altwgt byors_byedu {
        gen flag_1990_`level' = (avg_HHinc_1990_`level'==.) if fatheroccej!=99 
        replace flag_1990_`level' =0 if fatheroccej==99 
        label var flag_1990_`level' "flag 1990 imputed cells at `level' level"
    }
    assert flag_1990_byocc_byr_bys==flag_1990_byors_altwgt 

 * Impute at occ x race level
    sort fatheroccej race 

    local threshold "1"
    local number_byr "number_1990obs_byocc_byr"
    local cond "fatheroccej!=99"
    local inc_byr "avg_inctot_1990_byocc_byr avg_HHinc_1990_byocc_byr avg_faminc_1990_byocc_byr"

    foreach c of local inc_byr {             
        by fatheroccej: gen ratio = `c'[_n-1] / `c'[_n] if  `number_byr'>`threshold' & `number_byr'[_n-1]>`threshold' 
        sum ratio [aw=`number_byr'], d //calculate average racial income gap across occupations
        local ratio2 = `r(mean)'
        by fatheroccej: replace `c' = `c'[_n-1] / `ratio2' if `c'==. & `cond'
        drop ratio
    }

    * Impute at occ x race x south level
            /*Note: No imputations are necessary 
                    for fatheroccej==99. */

    assert number_1990obs_byocc_byr_bys == number_1990obs_byors_altwgt
    drop number_1990obs_byors_altwgt

    sort fatheroccej south_merge race 
    local threshold "1"  
    local number_byr_bys "number_1990obs_byocc_byr_bys"
    
    foreach it in norm adj {

        if "`it'" == "norm" local inc_byr_bys "avg_inctot_1990_byocc_byr_bys avg_faminc_1990_byocc_byr_bys avg_HHinc_1990_byocc_byr_bys"
        if "`it'" == "adj"  local inc_byr_bys "avg_inctot_1990_byors_altwgt avg_faminc_1990_byors_altwgt avg_HHinc_1990_byors_altwgt"

        foreach c of local inc_byr_bys {             
            by fatheroccej south_merge: gen white_black_ratio = `c'[_n-1]/`c'[_n] if `number_byr_bys'[_n-1]>`threshold' & `number_byr_bys'>`threshold' 

            foreach j in 0 1 {
                
                /*Calculate average racial income gap in nonsouth and 
                  south separately, across occupation cells */
                quietly sum white_black_ratio if south_merge==`j' [aw=`number_byr_bys'] 
                local ratio1 = r(mean)
                display `ratio1'
                
                levelsof fatheroccej if race==2 & south_merge==`j' & `c'==., local(occs1)

                /*Missing income for black respondents: rescale white income by 
                  average racial income gap in south or nonsouth, by occupation */
                foreach y in `occs1' {
                    display "occupation: `y'"
                    sum `c' if south_merge==`j' & race==1 & fatheroccej==`y'

                    if `r(N)'>0 replace `c' = `r(mean)' / `ratio1' if south_merge==`j' & race==2 & fatheroccej==`y' 
                    if `r(N)'==0 continue 
                }
            }
            drop *ratio* 
        }
    }

** Impute at occ x race x south x edu level
    sort fatheroccej race south_merge edu
        
    foreach c in $income_measures  {
    
    gen temp = avg_`c'_1990_byors_byedu / avg_`c'_1990_byocc_byr_bys 
    
    gen scale_factor =.
    forval i=1(1)5 {

    /*Calculate average "temp" ratio by education level */                     
    sum temp if edu==`i' [aw=number_1990obs_byors_byedu]    
    replace scale_factor=`r(mean)' if edu==`i' 
    }

    /* Rescale occ x race x south income by (average) "premium" 
       or "penalty" of having a certain level of education */        
    replace avg_`c'_1990_byors_byedu = avg_`c'_1990_byocc_byr_bys * scale_factor if avg_`c'_1990_byors_byedu==. & fatheroccej!=99  
    
    assert avg_`c'_1990_byors_byedu==. if fatheroccej==99 
    
    drop temp scale_factor
    }   


******************************************************
* SAVE (all levels except occ x race x region (4))
******************************************************

    foreach x in byocc byocc_byr byocc_byr_bys byors_byedu {
    
    if "`x'"=="byocc" local cell "fatheroccej"
    if "`x'"=="byocc_byr" local cell "fatheroccej race"
    if "`x'"=="byocc_byr_bys" local cell "fatheroccej race south_merge"
    if "`x'"=="byors_byedu" local cell "fatheroccej race south_merge edu"

    preserve 
 
    if "`x'"=="byors_byedu" drop if fatheroccej==99 

    bysort `cell': keep if _n==1

    if "`x'"=="byocc_byr_bys" {
        keep number_1990obs_`x' `cell' avg*`x' *_altwgt flag*`x'
        assert avg_inctot_1990_`x'!=. if fatheroccej!=65
        assert avg_inctot_1990_byors_altwgt!=. if fatheroccej!=65
    }
    if "`x'"=="byors_byedu" {
        keep number_1990obs_`x' `cell' avg*`x' flag*`x'
        assert avg_inctot_1990_`x'!=. if fatheroccej!=65
    }
    if ("`x'"!="byocc_byr_bys" & "`x'"!="byors_byedu")  {
        keep number_1990obs_`x' `cell' avg*`x' flag*`x'
        assert avg_inctot_1990_`x'!=.
    } 

    summ fatheroccej 
    
    compress
    save ./output/incomescores_fathers1990_`x'.dta, replace
    
    restore
    
    }
    
    
* Impute at occ x race x region level

    foreach x in byocc_byr_byreg {
    local cell "fatheroccej race region_merge"
 
    * Merge all templates together. Bring in template with largest # of cells first.                 
    use `incomescores_byocc_byr_byreg', clear
    gen south_merge = region_merge==3
    
    merge m:1 fatheroccej race south_merge using `incomescores_byocc_byr_bys'
    assert fatheroccej==99 if _merge!=3 
    drop _merge

    merge m:1 fatheroccej race using `incomescores_byocc_byr'
    assert fatheroccej==99 if _merge!=3 
    drop _merge

    merge m:1 fatheroccej using `incomescores_byocc'
    assert fatheroccej==99 if _merge!=3 
    drop _merge
  
    *Flag number of imputations at region level
    gen flag_1990_byocc_byr_byreg = (avg_HHinc_1990_byocc_byr_byreg==.) if fatheroccej!=99 
    replace flag_1990_byocc_byr_byreg =0 if fatheroccej==99 
    label var flag_1990_byocc_byr_byreg "flag 1990 imputed cells at byocc_byr_byreg level"     
    capture noisily: duplicates report fatheroccej race region_merge if flag_1990_byocc_byr_byreg==1

    * Impute 
    sort fatheroccej region_merge race
  
    local threshold "1"  
    local number_byocc_byr_byreg "number_1990obs_byocc_byr_byreg"
    local inc_byocc_byr_byreg "avg_inctot_1990_byocc_byr_byreg avg_faminc_1990_byocc_byr_byreg avg_HHinc_1990_byocc_byr_byreg "
    local cond "& fatheroccej!=99" 

    foreach c of local inc_byocc_byr_byreg {             
        by fatheroccej region_merge: gen white_black_ratio = `c'[_n-1]/`c'[_n] if `number_byocc_byr_byreg'[_n-1]>`threshold' & `number_byocc_byr_byreg'>`threshold' 

        foreach j in 1 2 3 4 {
            
            /*Calculate average racial income gap by current region of residence, 
              across occupation cells */
            quietly sum white_black_ratio if region_merge==`j' [aw=`number_byocc_byr_byreg'] 
            local ratio1 = r(mean)
            display `ratio1'
            
            levelsof fatheroccej if race==2 & region_merge==`j' & `c'==. `cond', local(occs1)

            /*Missing income for black respondents: rescale white income by 
              average racial income gap of region, by occupation */
            foreach y in `occs1' {
                display "occupation: `y'"
                sum `c' if region_merge==`j' & race==1 & fatheroccej==`y'
               
                if `r(N)'>0 replace `c' = `r(mean)' / `ratio1' if region_merge==`j' & race==2 & fatheroccej==`y' 
                if `r(N)'==0 continue 
            }
            assert `c'==. if fatheroccej==99 
        }
        drop *ratio* 
    }


    drop if fatheroccej==99 

    bysort `cell': keep if _n==1
    keep number_1990obs_`x' `cell' avg*`x' flag*`x'
    
    assert avg_inctot_1990_`x'!=. if !inlist(fatheroccej,31,65)
    summ fatheroccej 
    
    compress 
    save ./output/incomescores_fathers1990_`x'.dta, replace
    
    }